import urllib.request as ur
import lxml.etree as le
import user_agent
from tool import CookiePool

def getRequest(url):
    return ur.Request(
        url=url,
        headers={
            'User-Agent':user_agent.get_user_agent_pc(),
            'Cookie':cookiePool.get_cookie()
        }
    )

def getProxyOpener():
    proxy_address = ur.urlopen('http://api.ip.data5u.com/dynamic/get.html?order=d314e5e5e19b0dfd19762f98308114ba&sep=4').read().decode('utf-8').strip()
    proxy_handler = ur.ProxyHandler(
        {
            'http':proxy_address
        }
    )
    return ur.build_opener(proxy_handler)


if __name__ == '__main__':
    keyword = input('请输入关键词:')
    pn_start = int(input('起始页:'))
    pn_end = int(input('终止页:'))

    cookiePool = CookiePool(
        urls=[
            'https://blog.csdn.net/kzl_knight/article/details/103187223',
            'https://blog.csdn.net/kzl_knight/article/details/103182569',
        ])
    cookiePool.run().start()

    for pn in range(pn_start, pn_end + 1):
        request = getRequest(
            'https://so.csdn.net/so/search/s.do?p=%s&q=%s&t=blog&domain=&o=&s=&u=&l=&f=&rbg=0' % (pn, keyword)
        )
        try:
            response = getProxyOpener().open(request).read()
            href_s = le.HTML(response).xpath('//span[@class="down fr"]/../span[@class="link"]/a/@href')
            for href in href_s:
                try:
                    response_blog = getProxyOpener().open(
                        getRequest(href)
                    ).read()
                    title = le.HTML(response_blog).xpath('//h1[@class="title-article"]/text()')[0]
                    print(title)
                    with open('blog/%s.html' % title, 'wb') as f:
                        f.write(response_blog)
                except Exception as e:
                    print(e)
        except:
            pass

